This notebook will begin looking at clustering methods on the expression of the genes in a single sample of the dataset of interest, from an unbiased approach.
# Load libraries
library(magrittr)
library(scater)
library(readr)
library(bluster)
library(ggpubr)
library(pheatmap)
# Set file paths
data_dir <- file.path("results", "Gawad_processed_data")
# Source custom functions script
source(file.path("utils", "clustering-functions.R"))
sample_290_normalized <- read_rds(
file.path(data_dir, "SCPCS000216", "SCPCL000290_miQC_processed_sce.rds"))
# Perform k-means clustering
sample_290_normalized <- kmeans_clustering(
sample_290_normalized,
params_range = c(4:10),
step_size = 1
)
# grab column names with clustering results
kmeans_cols <- grep("kmeans", colnames(colData(sample_290_normalized)))
kmeans_cluster_names <- colnames(colData(sample_290_normalized)[, kmeans_cols])
# Plot k-means
kmeans_plot_list <- kmeans_cluster_names %>%
purrr::map(~ plotReducedDim(sample_290_normalized, dimred = "UMAP", colour_by = .x) +
theme_bw()+
theme(text = element_text(size = 22)))
cowplot::plot_grid(plotlist = kmeans_plot_list, ncol = 4)
# Perform graph-based walktrap clustering
sample_290_normalized <- graph_clustering(
sample_290_normalized,
params_range = c(5:25),
step_size = 5,
weighting_type = "rank",
cluster_function = "walktrap"
)
# grab column names with clustering results
walktrap_cols <- grep("walktrap", colnames(colData(sample_290_normalized)))
walktrap_cluster_names <- colnames(colData(sample_290_normalized)[, walktrap_cols])
# Plot
walktrap_plot_list <- walktrap_cluster_names %>%
purrr::map(~ plotReducedDim(sample_290_normalized, dimred = "UMAP", colour_by = .x) +
theme_bw() +
theme(text = element_text(size = 22)))
cowplot::plot_grid(plotlist = walktrap_plot_list, ncol = 3)
# Perform graph-based louvain clustering
sample_290_normalized <- graph_clustering(
sample_290_normalized,
params_range = c(5:25),
step_size = 5,
weighting_type = "jaccard",
cluster_function = "louvain"
)
# grab column names with clustering results
louvain_cols <- grep("walktrap", colnames(colData(sample_290_normalized)))
louvain_cluster_names <- colnames(colData(sample_290_normalized)[, louvain_cols])
# Plot
louvain_plot_list <- louvain_cluster_names %>%
purrr::map(~ plotReducedDim(sample_290_normalized, dimred = "UMAP", colour_by = .x) +
theme_bw() +
theme(text = element_text(size = 22)))
cowplot::plot_grid(plotlist = louvain_plot_list, ncol = 3)
# Check the k-means cluster validity stats for each of the clusters and return
# stats in a data frame
kmeans_stats_df <- create_metadata_stats_df(sample_290_normalized, c(4:10), 1, "kmeans")
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
# Preview the results
head(kmeans_stats_df)
# Summarize the stats and return in a data frame
kmeans_summary_stats_df <- summarize_clustering_stats(kmeans_stats_df)
`summarise()` has grouped output by 'cluster_names_column', 'cluster_type'. You can override using the `.groups` argument.
# Preview the summary results
head(kmeans_summary_stats_df)
# Plot individual cluster purity stats
kmeans_purity_plots <- plot_cluster_purity(kmeans_stats_df)
kmeans_purity_plots
# Plot individual cluster silhouette width stats
kmeans_silhouette_plots <- plot_cluster_silhouette_width(kmeans_stats_df)
kmeans_silhouette_plots
# Check the walktrap cluster validity stats for each of the clusters and return
# stats in a data frame
walktrap_stats_df <- create_metadata_stats_df(sample_290_normalized, c(5:25), 5, "walktrap")
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
# Preview the all stats results
head(walktrap_stats_df)
# Summarize the stats and return in a data frame
walktrap_summary_stats_df <- summarize_clustering_stats(walktrap_stats_df)
`summarise()` has grouped output by 'cluster_names_column', 'cluster_type'. You can override using the `.groups` argument.
# Preview the summary results
head(walktrap_summary_stats_df)
# Plot individual cluster purity stats
walktrap_purity_plots <- plot_cluster_purity(walktrap_stats_df)
walktrap_purity_plots
# Plot individual cluster silhouette width stats
walktrap_silhouette_plots <- plot_cluster_silhouette_width(walktrap_stats_df)
walktrap_silhouette_plots
# Check the louvain cluster validity stats for each of the clusters and return
# stats in a data frame
louvain_stats_df <- create_metadata_stats_df(sample_290_normalized, c(5:25), 5, "louvain")
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
# Preview the results
head(louvain_stats_df)
# Summarize the stats and return in a data frame
louvain_summary_stats_df <- summarize_clustering_stats(louvain_stats_df)
`summarise()` has grouped output by 'cluster_names_column', 'cluster_type'. You can override using the `.groups` argument.
# Preview the summary results
head(louvain_summary_stats_df)
# Plot individual cluster purity stats
louvain_purity_plots <- plot_cluster_purity(louvain_stats_df)
louvain_purity_plots
# Plot individual cluster silhouette width stats
louvain_silhouette_plots <- plot_cluster_silhouette_width(louvain_stats_df)
louvain_silhouette_plots
summary_stats_df_list <- list("walktrap" = walktrap_summary_stats_df,
"louvain" = louvain_summary_stats_df)
# purity summary plot
plot_avg_validity_stats(summary_stats_df_list, "avg_purity")
#silhouette width summary plot
plot_avg_validity_stats(summary_stats_df_list, "avg_width")
# Check cluster stability
kmeans_ari_df <- get_cluster_stability_summary(sample_290_normalized, c(4:10), 1, "kmeans")
kmeans_ari_df
# plot cluster stability ARI values
plot_cluster_stability_ari(kmeans_ari_df)
walktrap_ari_df <- get_cluster_stability_summary(sample_290_normalized, c(5:25), 5, cluster_type = "graph", cluster_function = "walktrap")
walktrap_ari_df
# plot cluster stability ARI values
plot_cluster_stability_ari(walktrap_ari_df)
louvain_ari_df <- get_cluster_stability_summary(sample_290_normalized, c(5:25), 5, cluster_type = "graph", cluster_function = "louvain")
louvain_ari_df
# plot cluster stability ARI values
plot_cluster_stability_ari(louvain_ari_df)
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 LC_COLLATE=C.UTF-8
[5] LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8 LC_PAPER=C.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] pheatmap_1.0.12 ggpubr_0.4.0 bluster_1.4.0
[4] readr_2.1.1 scater_1.22.0 ggplot2_3.3.5
[7] scuttle_1.4.0 SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
[10] Biobase_2.54.0 GenomicRanges_1.46.1 GenomeInfoDb_1.30.0
[13] IRanges_2.28.0 S4Vectors_0.32.3 BiocGenerics_0.40.0
[16] MatrixGenerics_1.6.0 matrixStats_0.61.0 magrittr_2.0.1
loaded via a namespace (and not attached):
[1] bitops_1.0-7 RColorBrewer_1.1-2 tools_4.1.2 backports_1.4.0
[5] utf8_1.2.2 R6_2.5.1 irlba_2.3.3 vipor_0.4.5
[9] DBI_1.1.1 colorspace_2.0-2 nnet_7.3-17 withr_2.4.3
[13] tidyselect_1.1.1 gridExtra_2.3 compiler_4.1.2 cli_3.1.0
[17] BiocNeighbors_1.12.0 DelayedArray_0.20.0 labeling_0.4.2 scales_1.1.1
[21] digest_0.6.29 rmarkdown_2.11 XVector_0.34.0 pkgconfig_2.0.3
[25] htmltools_0.5.2 sparseMatrixStats_1.6.0 fastmap_1.1.0 rlang_0.4.12
[29] rstudioapi_0.13 DelayedMatrixStats_1.16.0 farver_2.1.0 generics_0.1.1
[33] BiocParallel_1.28.2 dplyr_1.0.7 car_3.0-12 RCurl_1.98-1.5
[37] BiocSingular_1.10.0 modeltools_0.2-23 GenomeInfoDbData_1.2.7 Matrix_1.4-1
[41] Rcpp_1.0.7 ggbeeswarm_0.6.0 munsell_0.5.0 fansi_0.5.0
[45] abind_1.4-5 viridis_0.6.2 lifecycle_1.0.1 yaml_2.2.1
[49] carData_3.0-4 MASS_7.3-57 zlibbioc_1.40.0 flexmix_2.3-17
[53] grid_4.1.2 parallel_4.1.2 ggrepel_0.9.1 crayon_1.4.2
[57] lattice_0.20-45 cowplot_1.1.1 beachmat_2.10.0 splines_4.1.2
[61] hms_1.1.1 knitr_1.36 pillar_1.6.4 igraph_1.2.9
[65] ggsignif_0.6.3 ScaledMatrix_1.2.0 magic_1.6-0 pdfCluster_1.0-3
[69] glue_1.5.1 evaluate_0.14 renv_0.14.0 tweenr_1.0.2
[73] vctrs_0.3.8 tzdb_0.2.0 polyclip_1.10-0 gtable_0.3.0
[77] purrr_0.3.4 tidyr_1.1.4 assertthat_0.2.1 ggforce_0.3.3
[81] xfun_0.28 rsvd_1.0.5 broom_0.7.10 miQC_1.2.0
[85] rstatix_0.7.0 viridisLite_0.4.0 geometry_0.4.6 tibble_3.1.6
[89] beeswarm_0.4.0 cluster_2.1.3 ellipsis_0.3.2